want to test if i can import the procrustes info from morphoJ to do the PCA here instead.
got the procrustes info to import, but couldn’t get the multivariate to work here. So, I’ll just import the regression residuals and work with that
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(curl)
## Using libcurl 8.3.0 with Schannel
##
## Attaching package: 'curl'
##
## The following object is masked from 'package:readr':
##
## parse_date
test.df <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/MV%20regression%2C%20results.txt")
test.df <- read.table(test.df, header = TRUE, sep = "\t")
##### PCA
PCA.test <- prcomp(test.df[, 7:38], center = TRUE, scale. = FALSE) #false uses covariance matrix like in MorphoJ; scale=true only useful if variables are measured on different scales
summary(PCA.test)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 0.02836 0.01986 0.01969 0.01378 0.01248 0.01121 0.009914
## Proportion of Variance 0.30299 0.14861 0.14601 0.07156 0.05867 0.04729 0.037020
## Cumulative Proportion 0.30299 0.45160 0.59760 0.66916 0.72783 0.77512 0.812150
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.008759 0.008336 0.00762 0.00651 0.006144 0.005412
## Proportion of Variance 0.028900 0.026170 0.02187 0.01596 0.014220 0.011030
## Cumulative Proportion 0.841040 0.867210 0.88908 0.90504 0.919260 0.930290
## PC14 PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.005218 0.005002 0.004721 0.004455 0.003892 0.003739
## Proportion of Variance 0.010260 0.009420 0.008390 0.007470 0.005710 0.005270
## Cumulative Proportion 0.940550 0.949980 0.958370 0.965850 0.971550 0.976820
## PC20 PC21 PC22 PC23 PC24 PC25
## Standard deviation 0.00333 0.003231 0.00321 0.002557 0.002468 0.002408
## Proportion of Variance 0.00418 0.003930 0.00388 0.002460 0.002290 0.002180
## Cumulative Proportion 0.98099 0.984930 0.98881 0.991270 0.993560 0.995750
## PC26 PC27 PC28 PC29 PC30 PC31
## Standard deviation 0.002136 0.002034 0.00161 2.896e-16 2.126e-16 2.73e-17
## Proportion of Variance 0.001720 0.001560 0.00098 0.000e+00 0.000e+00 0.00e+00
## Cumulative Proportion 0.997470 0.999020 1.00000 1.000e+00 1.000e+00 1.00e+00
## PC32
## Standard deviation 1.825e-17
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
(eig.val.test <- get_eigenvalue(PCA.test))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 8.044309e-04 3.029912e+01 30.29912
## Dim.2 3.945433e-04 1.486058e+01 45.15971
## Dim.3 3.876418e-04 1.460064e+01 59.76035
## Dim.4 1.899869e-04 7.155909e+00 66.91626
## Dim.5 1.557587e-04 5.866694e+00 72.78295
## Dim.6 1.255665e-04 4.729499e+00 77.51245
## Dim.7 9.829059e-05 3.702143e+00 81.21459
## Dim.8 7.672005e-05 2.889683e+00 84.10427
## Dim.9 6.948059e-05 2.617006e+00 86.72128
## Dim.10 5.806752e-05 2.187130e+00 88.90841
## Dim.11 4.237375e-05 1.596019e+00 90.50443
## Dim.12 3.775063e-05 1.421888e+00 91.92632
## Dim.13 2.928853e-05 1.103161e+00 93.02948
## Dim.14 2.723190e-05 1.025697e+00 94.05518
## Dim.15 2.502220e-05 9.424683e-01 94.99764
## Dim.16 2.228727e-05 8.394564e-01 95.83710
## Dim.17 1.984478e-05 7.474592e-01 96.58456
## Dim.18 1.514869e-05 5.705797e-01 97.15514
## Dim.19 1.398100e-05 5.265985e-01 97.68174
## Dim.20 1.108613e-05 4.175621e-01 98.09930
## Dim.21 1.044038e-05 3.932397e-01 98.49254
## Dim.22 1.030651e-05 3.881977e-01 98.88074
## Dim.23 6.537065e-06 2.462204e-01 99.12696
## Dim.24 6.089258e-06 2.293536e-01 99.35631
## Dim.25 5.797141e-06 2.183509e-01 99.57466
## Dim.26 4.563074e-06 1.718695e-01 99.74653
## Dim.27 4.138116e-06 1.558633e-01 99.90240
## Dim.28 2.591368e-06 9.760463e-02 100.00000
## Dim.29 8.389200e-32 3.159816e-27 100.00000
## Dim.30 4.520650e-32 1.702716e-27 100.00000
## Dim.31 7.455525e-34 2.808145e-29 100.00000
## Dim.32 3.331721e-34 1.254902e-29 100.00000
ind.test <- get_pca_ind(PCA.test)
head(ind.test$coord[,1:4])
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 0.01340669 0.0078857358 0.014029153 -0.0106578735
## 2 0.01342228 -0.0223168798 0.022468552 -0.0050787697
## 3 -0.02748104 -0.0096209523 0.014553590 0.0091966388
## 4 -0.02001009 0.0003470414 0.008558898 0.0037383369
## 5 -0.03116908 0.0111344766 0.013840877 0.0051050439
## 6 -0.01375579 0.0166512862 0.004231201 -0.0008373614
test.df <- cbind(test.df, ind.test$coord[,1:4])
(loadings <- PCA.test$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## RegResid1 0.135008248 -0.110107210 0.16716573 -0.274849465 0.052275630
## RegResid2 0.155310001 -0.133546894 -0.01466466 0.105705695 -0.190927188
## RegResid3 0.263422372 0.006700256 -0.82407265 0.042422404 0.259518193
## RegResid4 0.148360094 0.156240124 -0.16632659 0.198769756 0.070899948
## RegResid5 -0.670432329 -0.505275356 -0.17727123 0.232038069 -0.095963475
## RegResid6 0.174401325 0.117100951 0.15249628 0.370790715 0.143331415
## RegResid7 0.205954566 0.026521398 0.18463118 0.029099777 0.115944040
## RegResid8 0.029040819 -0.029517028 0.02861223 0.282470957 0.037279697
## RegResid9 -0.079230864 0.183352491 -0.02246982 -0.238692956 0.169871081
## RegResid10 0.100564008 0.002877471 -0.10554812 0.056717141 -0.297849545
## RegResid11 -0.105055841 0.247292942 0.01872475 -0.026959085 -0.001853964
## RegResid12 0.059706631 -0.090181853 -0.12731485 -0.093271461 -0.238773394
## RegResid13 -0.042277217 0.087980640 -0.13963359 -0.340711764 -0.209979248
## RegResid14 0.005249607 -0.188157464 -0.09839363 -0.199721166 -0.055348975
## RegResid15 0.143634501 -0.185964018 0.12708402 0.054968122 0.139445654
## RegResid16 -0.072327608 -0.031638025 0.09710973 -0.025205391 0.369938210
## RegResid17 0.188160260 -0.387448897 0.13142518 -0.018493539 0.196717211
## RegResid18 -0.158585258 0.035307617 0.06818033 -0.078120936 0.345505236
## RegResid19 -0.003795688 -0.134926056 0.06605121 0.099567189 0.097667438
## RegResid20 -0.363086378 0.352140160 -0.05579576 -0.145489784 0.136992213
## RegResid21 -0.145276979 0.213625337 0.09734773 0.425220615 0.077901415
## RegResid22 -0.044727402 0.001069053 0.00684746 -0.188717407 0.008350602
## RegResid23 -0.030163021 0.231249783 0.04663237 -0.079076106 0.016419621
## RegResid24 -0.011025942 -0.098728303 0.06183703 -0.049875094 0.024288123
## RegResid25 0.023045499 0.169496081 0.02669218 0.143853048 -0.326092317
## RegResid26 -0.122714633 0.038725202 0.01041427 -0.024921153 -0.034698824
## RegResid27 -0.038783243 0.209659503 0.01854588 0.157356674 -0.317146993
## RegResid28 -0.119849240 0.035390979 0.06277723 -0.168120638 -0.019423858
## RegResid29 0.074531941 -0.035206773 0.15266214 -0.174676652 -0.067499447
## RegResid30 0.118234295 -0.083421229 0.04286607 0.006064492 -0.110964461
## RegResid31 0.081257793 -0.016950123 0.12648494 -0.031066332 -0.107224840
## RegResid32 0.101449680 -0.083660762 0.03690298 -0.047075726 -0.188599199
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")
##### POST PCA TESTS
lat.p <- test.df[test.df$Species == "p.latipinna",]
form.p <- test.df[test.df$Species == "p.formosa",]
(pc1 <- t.test(lat.p$Dim.1, form.p$Dim.1, alternative = "two.sided"))
##
## Welch Two Sample t-test
##
## data: lat.p$Dim.1 and form.p$Dim.1
## t = 24.956, df = 303.49, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03970850 0.04650662
## sample estimates:
## mean of x mean of y
## 0.02497059 -0.01813697
(pc2 <- t.test(lat.p$Dim.2, form.p$Dim.2, alternative = "two.sided"))
##
## Welch Two Sample t-test
##
## data: lat.p$Dim.2 and form.p$Dim.2
## t = -0.48969, df = 277.42, p-value = 0.6247
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005099263 0.003067681
## sample estimates:
## mean of x mean of y
## 0.001469468 0.002485259
(pc3 <- t.test(lat.p$Dim.3, form.p$Dim.3, alternative = "two.sided"))
##
## Welch Two Sample t-test
##
## data: lat.p$Dim.3 and form.p$Dim.3
## t = 4.4683, df = 303.32, p-value = 1.115e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.005281476 0.013594355
## sample estimates:
## mean of x mean of y
## 0.005540339 -0.003897577
(pc1V <- leveneTest(Dim.1~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.3452 0.7084
## 337
(pc2V <- leveneTest(Dim.2~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 7.5716 0.0006074 ***
## 337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(pc3V <- leveneTest(Dim.3~Species, data=test.df))
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 4.3582 0.01353 *
## 337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##### PLOTS
library(AMR)
library(ggplot2)
library(ggfortify)
library(ggbiplot)
##
## Attaching package: 'ggbiplot'
##
## The following object is masked from 'package:ggfortify':
##
## ggbiplot
wes.yel <- "#E1AF00"
wes.blu <- "#78B7C5"
plot1<- autoplot(PCA.test, data = test.df, colour="Species", loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
plot1
plot2<- autoplot(PCA.test, x=2, y=3, data = test.df, colour='Species', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#9986A5"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
plot2
Purpose: Briefly outline the purpose of your experiment.
Hypothesis: Outline the hypotheses if applicable.
Predictions: - Prediction 1 (if applicable).
Data: Briefly describe the type of data collected.
To get digital distances: The TPS coordinates were in a long format, and I thought it would be easiest to locate the values needed to insert into the euclidean distance formula with a wide format. So, I first cleaned up the original TPS file–I removed all curve data, and unnecessary rows (ie. LM=16, image=) to be left with a column of IDs, a column of landmark labels, and two columns for the coordinates.
Importing that into R, I then used the wider fxn to create one column for IDs, 16 columns for the X coordinates of all the landmarks, and 16 columns for all the y coordinates of all the landmarks (originally wanted it to be a coordinates column, so there would be two rows for each specimen to minimize column numbers, but whatever, this works). I separated this into two dataframes, one with just the X coords and one with just the Y, that way it would be easier to call.
I calculated the euclidean distance (formula: sqrt(sq(X\(LM2-X\)LM1)+sq(Y\(LM2-Y\)LM1))) for most of the same measurements (only missing total length, and head width, as this was not possible in the digital photos). For head depth, wasn’t sure if 2-> 11 or 2->12 was closer to what I measured by hand; same for body depth, wasn’t sure if it was 3->10 or 3->11. I included both.
finished all of the calculations and added the values into a dataframe with the IDs from the original tps file. Exported it to excel
Once in excel, I realized that the values I had were in pixels (I think?) rather than mm, so I needed to find a way to convert in order to compare to the hand measurements. I found out that the scale factor (included in the original tps file) is mm/pixel, so I added this to the excel, and multiplied it to each value to get the mm. However, it was a bit off (eg 4.5 rather than 45) so I multiplied it by 10 to get to a number that resembled the hand measurements. Not 100% sure if this was correct, or why I would need to multiply by 10, but going with it for now.
library(tidyverse)
library(curl)
raw <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/dig-dist.csv")
raw <- read.csv(raw, header = TRUE, sep = ",", stringsAsFactors = TRUE)
raw1 <- raw %>%
pivot_wider(names_from = LABELS, values_from = c(X.COR, Y.COR))
x <- raw1[ , 1:17]
y <- raw1[, c(1, 18:33)]
HL.dig <- sqrt(((x$X.COR_LM2 -x$X.COR_LM1)^2 + (y$Y.COR_LM2-y$Y.COR_LM1)^2))
SL.dig <- sqrt(((x$X.COR_LM6 -x$X.COR_LM1)^2 + (y$Y.COR_LM6-y$Y.COR_LM1)^2))
PreDL.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM1)^2 + (y$Y.COR_LM3-y$Y.COR_LM1)^2))
DbL.dig <- sqrt(((x$X.COR_LM4 -x$X.COR_LM3)^2 + (y$Y.COR_LM4-y$Y.COR_LM3)^2))
CPD.dig <- sqrt(((x$X.COR_LM7 -x$X.COR_LM5)^2 + (y$Y.COR_LM7-y$Y.COR_LM5)^2))
HD1.dig <- sqrt(((x$X.COR_LM11 -x$X.COR_LM2)^2 + (y$Y.COR_LM11-y$Y.COR_LM2)^2))
HD2.dig <- sqrt(((x$X.COR_LM12 -x$X.COR_LM2)^2 + (y$Y.COR_LM12-y$Y.COR_LM2)^2))
CPL.dig <- sqrt(((x$X.COR_LM8 -x$X.COR_LM6)^2 + (y$Y.COR_LM8-y$Y.COR_LM6)^2))
BD1.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM10)^2 + (y$Y.COR_LM3-y$Y.COR_LM10)^2))
BD2.dig <- sqrt(((x$X.COR_LM3 -x$X.COR_LM11)^2 + (y$Y.COR_LM3-y$Y.COR_LM11)^2))
SnL.dig <- sqrt(((x$X.COR_LM15 -x$X.COR_LM1)^2 + (y$Y.COR_LM15-y$Y.COR_LM1)^2))
OL.dig <- sqrt(((x$X.COR_LM16 -x$X.COR_LM15)^2 + (y$Y.COR_LM16-y$Y.COR_LM15)^2))
raw2 <- cbind(raw1[,1], SL.dig, BD1.dig, BD2.dig, CPD.dig, CPL.dig, PreDL.dig, DbL.dig, HL.dig, HD1.dig, HD2.dig, SnL.dig, OL.dig)
library(writexl)
write_xlsx(raw2, 'C:/Users/allis/OneDrive/Desktop/comparison_data2.xlsx')
Performed most analyses in MorphoJ. Analyses here will strictly be for comparing digital vs hand measurements.
library(tidyverse)
library(curl)
raw <- curl("https://raw.githubusercontent.com/allisondavis/morphology_analysis/refs/heads/master/comparison_data.csv")
raw <- read.csv(raw, header = TRUE, sep = ",", stringsAsFactors = TRUE)
#AD20-084 is missing all manual values, so I will remove to avoid issues with NAs
raw<- raw[raw$ID !="AD20-082", ]
I will use Shapiro-wilk, histograms, and QQ plots to determine what traits are normal.
Will separate my raw dataset into the two factors of interest: digital and manual measurements.
library(dplyr)
digital <- raw %>%
select(ID, SPP, ends_with(".dig"))
manual <- raw %>%
select(ID, SPP, ends_with(".man"))
All but OL fail SW test with a slight skew/deviation to the right.
##### Shapiro-Wilk test #####
shapiro.test(digital$SL)
##
## Shapiro-Wilk normality test
##
## data: digital$SL
## W = 0.97473, p-value = 1.165e-05
shapiro.test(digital$BD1)
##
## Shapiro-Wilk normality test
##
## data: digital$BD1
## W = 0.98196, p-value = 0.0002967
shapiro.test(digital$BD2)
##
## Shapiro-Wilk normality test
##
## data: digital$BD2
## W = 0.9907, p-value = 0.03071
shapiro.test(digital$CPD)
##
## Shapiro-Wilk normality test
##
## data: digital$CPD
## W = 0.96472, p-value = 2.6e-07
shapiro.test(digital$CPL)
##
## Shapiro-Wilk normality test
##
## data: digital$CPL
## W = 0.95444, p-value = 9.409e-09
shapiro.test(digital$PreDL)
##
## Shapiro-Wilk normality test
##
## data: digital$PreDL
## W = 0.98017, p-value = 0.0001268
shapiro.test(digital$DbL)
##
## Shapiro-Wilk normality test
##
## data: digital$DbL
## W = 0.9864, p-value = 0.002823
shapiro.test(digital$HL)
##
## Shapiro-Wilk normality test
##
## data: digital$HL
## W = 0.95243, p-value = 5.184e-09
shapiro.test(digital$HD1)
##
## Shapiro-Wilk normality test
##
## data: digital$HD1
## W = 0.9765, p-value = 2.463e-05
shapiro.test(digital$HD2)
##
## Shapiro-Wilk normality test
##
## data: digital$HD2
## W = 0.97148, p-value = 3.143e-06
shapiro.test(digital$SnL)
##
## Shapiro-Wilk normality test
##
## data: digital$SnL
## W = 0.97219, p-value = 4.145e-06
shapiro.test(digital$OL)
##
## Shapiro-Wilk normality test
##
## data: digital$OL
## W = 0.99609, p-value = 0.5705
##### Histograms #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
hist(digital$SL, breaks=30)
hist(digital$BD1, breaks=30)
hist(digital$BD2, breaks=30)
hist(digital$CPD, breaks=30)
hist(digital$CPL, breaks=30)
hist(digital$PreDL, breaks=30)
hist(digital$DbL, breaks=30)
hist(digital$HL, breaks=30)
hist(digital$HD1, breaks=30)
hist(digital$HD2, breaks=30)
hist(digital$SnL, breaks=30)
hist(digital$OL, breaks=30)
##### QQ plots #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
qqnorm(digital$SL)
qqline(digital$SL)
qqnorm(digital$BD1)
qqline(digital$BD1)
qqnorm(digital$BD2)
qqline(digital$BD2)
qqnorm(digital$CPD)
qqline(digital$CPD)
qqnorm(digital$CPL)
qqline(digital$CPL)
qqnorm(digital$PreDL)
qqline(digital$PreDL)
qqnorm(digital$DbL)
qqline(digital$DbL)
qqnorm(digital$HL)
qqline(digital$HL)
qqnorm(digital$HD1)
qqline(digital$HD1)
qqnorm(digital$HD2)
qqline(digital$HD2)
qqnorm(digital$SnL)
qqline(digital$SnL)
qqnorm(digital$OL)
qqline(digital$OL)
All fail the SW test, with a slight skew/deviation to the right.
##### Shapiro-Wilk test #####
shapiro.test(manual$SL)
##
## Shapiro-Wilk normality test
##
## data: manual$SL
## W = 0.98036, p-value = 0.0001385
shapiro.test(manual$BD1)
##
## Shapiro-Wilk normality test
##
## data: manual$BD1
## W = 0.96494, p-value = 2.806e-07
shapiro.test(manual$CPD)
##
## Shapiro-Wilk normality test
##
## data: manual$CPD
## W = 0.97037, p-value = 2.046e-06
shapiro.test(manual$CPL)
##
## Shapiro-Wilk normality test
##
## data: manual$CPL
## W = 0.97427, p-value = 9.619e-06
shapiro.test(manual$PreDL)
##
## Shapiro-Wilk normality test
##
## data: manual$PreDL
## W = 0.98037, p-value = 0.0001391
shapiro.test(manual$DbL)
##
## Shapiro-Wilk normality test
##
## data: manual$DbL
## W = 0.98347, p-value = 0.00062
shapiro.test(manual$HL)
##
## Shapiro-Wilk normality test
##
## data: manual$HL
## W = 0.97032, p-value = 2.009e-06
shapiro.test(manual$HD1)
##
## Shapiro-Wilk normality test
##
## data: manual$HD1
## W = 0.98378, p-value = 0.0007249
shapiro.test(manual$SnL)
##
## Shapiro-Wilk normality test
##
## data: manual$SnL
## W = 0.97615, p-value = 2.113e-05
shapiro.test(manual$OL)
##
## Shapiro-Wilk normality test
##
## data: manual$OL
## W = 0.98828, p-value = 0.007818
##### Histograms #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
hist(manual$SL, breaks=30)
hist(manual$BD1, breaks=30)
hist(manual$CPD, breaks=30)
hist(manual$CPL, breaks=30)
hist(manual$PreDL, breaks=30)
hist(manual$DbL, breaks=30)
hist(manual$HL, breaks=30)
hist(manual$HD1, breaks=30)
hist(manual$SnL, breaks=30)
hist(manual$OL, breaks=30)
##### QQ plots #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
qqnorm(manual$SL)
qqline(manual$SL)
qqnorm(manual$BD1)
qqline(manual$BD1)
qqnorm(manual$CPD)
qqline(manual$CPD)
qqnorm(manual$CPL)
qqline(manual$CPL)
qqnorm(manual$PreDL)
qqline(manual$PreDL)
qqnorm(manual$DbL)
qqline(manual$DbL)
qqnorm(manual$HL)
qqline(manual$HL)
qqnorm(manual$HD1)
qqline(manual$HD1)
qqnorm(manual$SnL)
qqline(manual$SnL)
qqnorm(manual$OL)
qqline(manual$OL)
While some values are still significant, log transformations vastly improve normality (eg p=2e-16 to p=0.002). The histograms and QQ plots look pretty damn normal, so I will stick with log transformed values.
Log transformed data sets
dig_trans <- digital
dig_trans[, c(3:14)] <- log(dig_trans[, c(3:14)])
man_trans <- manual
man_trans[, c(3:12)] <- log(man_trans[, c(3:12)])
OL was the only one that was normal in raw check, but since it was not normal in manual, and I want to compare dig to man, I will transform the OL here too (don’t want to compare raw to transformed measurements).
##### Shapiro-Wilk test #####
shapiro.test(dig_trans$SL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$SL
## W = 0.99403, p-value = 0.2055
shapiro.test(dig_trans$BD1)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$BD1
## W = 0.9938, p-value = 0.1809
shapiro.test(dig_trans$BD2)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$BD2
## W = 0.98978, p-value = 0.0181
shapiro.test(dig_trans$CPD)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$CPD
## W = 0.99187, p-value = 0.06017
shapiro.test(dig_trans$CPL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$CPL
## W = 0.99116, p-value = 0.03989
shapiro.test(dig_trans$PreDL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$PreDL
## W = 0.99151, p-value = 0.04877
shapiro.test(dig_trans$DbL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$DbL
## W = 0.98425, p-value = 0.0009222
shapiro.test(dig_trans$HL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$HL
## W = 0.99129, p-value = 0.04319
shapiro.test(dig_trans$HD1)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$HD1
## W = 0.99687, p-value = 0.758
shapiro.test(dig_trans$HD2)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$HD2
## W = 0.99479, p-value = 0.3068
shapiro.test(dig_trans$SnL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$SnL
## W = 0.99732, p-value = 0.8576
shapiro.test(dig_trans$OL)
##
## Shapiro-Wilk normality test
##
## data: dig_trans$OL
## W = 0.98989, p-value = 0.01936
##### Histograms #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
hist(dig_trans$SL, breaks=30)
hist(dig_trans$BD1, breaks=30)
hist(dig_trans$BD2, breaks=30)
hist(dig_trans$CPD, breaks=30)
hist(dig_trans$CPL, breaks=30)
hist(dig_trans$PreDL, breaks=30)
hist(dig_trans$DbL, breaks=30)
hist(dig_trans$HL, breaks=30)
hist(dig_trans$HD1, breaks=30)
hist(dig_trans$HD2, breaks=30)
hist(dig_trans$SnL, breaks=30)
hist(dig_trans$OL, breaks=30)
##### QQ plots #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
qqnorm(dig_trans$SL)
qqline(dig_trans$SL)
qqnorm(dig_trans$BD1)
qqline(dig_trans$BD1)
qqnorm(dig_trans$BD2)
qqline(dig_trans$BD2)
qqnorm(dig_trans$CPD)
qqline(dig_trans$CPD)
qqnorm(dig_trans$CPL)
qqline(dig_trans$CPL)
qqnorm(dig_trans$PreDL)
qqline(dig_trans$PreDL)
qqnorm(dig_trans$DbL)
qqline(dig_trans$DbL)
qqnorm(dig_trans$HL)
qqline(dig_trans$HL)
qqnorm(dig_trans$HD1)
qqline(dig_trans$HD1)
qqnorm(dig_trans$HD2)
qqline(dig_trans$HD2)
qqnorm(dig_trans$SnL)
qqline(dig_trans$SnL)
qqnorm(dig_trans$OL)
qqline(dig_trans$OL)
##### Shapiro-Wilk test #####
shapiro.test(man_trans$SL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$SL
## W = 0.99424, p-value = 0.2297
shapiro.test(man_trans$BD1)
##
## Shapiro-Wilk normality test
##
## data: man_trans$BD1
## W = 0.99567, p-value = 0.4747
shapiro.test(man_trans$CPD)
##
## Shapiro-Wilk normality test
##
## data: man_trans$CPD
## W = 0.99598, p-value = 0.5428
shapiro.test(man_trans$CPL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$CPL
## W = 0.98966, p-value = 0.01693
shapiro.test(man_trans$PreDL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$PreDL
## W = 0.98692, p-value = 0.003722
shapiro.test(man_trans$DbL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$DbL
## W = 0.98849, p-value = 0.008812
shapiro.test(man_trans$HL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$HL
## W = 0.99081, p-value = 0.03262
shapiro.test(man_trans$HD1)
##
## Shapiro-Wilk normality test
##
## data: man_trans$HD1
## W = 0.99264, p-value = 0.09385
shapiro.test(man_trans$SnL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$SnL
## W = 0.99033, p-value = 0.02488
shapiro.test(man_trans$OL)
##
## Shapiro-Wilk normality test
##
## data: man_trans$OL
## W = 0.97981, p-value = 0.0001072
##### Histograms #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
hist(man_trans$SL, breaks=30)
hist(man_trans$BD1, breaks=30)
hist(man_trans$CPD, breaks=30)
hist(man_trans$CPL, breaks=30)
hist(man_trans$PreDL, breaks=30)
hist(man_trans$DbL, breaks=30)
hist(man_trans$HL, breaks=30)
hist(man_trans$HD1, breaks=30)
hist(man_trans$SnL, breaks=30)
hist(man_trans$OL, breaks=30)
##### QQ plots #####
par(mfcol=c(2,2), oma = c(0,0,2,0))
qqnorm(man_trans$SL)
qqline(man_trans$SL)
qqnorm(man_trans$BD1)
qqline(man_trans$BD1)
qqnorm(man_trans$CPD)
qqline(man_trans$CPD)
qqnorm(man_trans$CPL)
qqline(man_trans$CPL)
qqnorm(man_trans$PreDL)
qqline(man_trans$PreDL)
qqnorm(man_trans$DbL)
qqline(man_trans$DbL)
qqnorm(man_trans$HL)
qqline(man_trans$HL)
qqnorm(man_trans$HD1)
qqline(man_trans$HD1)
qqnorm(man_trans$SnL)
qqline(man_trans$SnL)
qqnorm(man_trans$OL)
qqline(man_trans$OL)
I will perform 3 different analyses: ANOVA, Bland-Altman and PCA. These vary in whether they need long or wide formats. Also, the raw data contains columns that are not needed for these analyses (eg location or photo date) so I will remove those to leave just the species, tag ID, and measurements.
Long format
library(tidyr)
#log transform entire data set
raw2 <- raw
raw2[, c(3:24)] <- log(raw2[, c(3:24)])
# this will create a data frame with species, ID, characteristic, method and value
df_long <- raw2 %>%
pivot_longer(
cols = ends_with(".dig") | ends_with(".man"),
names_to = c("characteristic", "method"),
names_sep = "\\.",
values_to = "value"
)
# method and characteristic are initially a character; need to be a factor
df_long$method <- as.factor(df_long$method)
df_long$characteristic <- as.factor(df_long$characteristic)
# one fish (AD20-082) is missing hand measurements; will remove
df_long <- df_long[df_long$ID !="AD20-082", ]
#create data frames that only have one measurement for BD and HD
## BD1 and HD1 for both digital and manual measurements
df_long2 <- df_long[-grep("BD2", df_long$characteristic),]
df_long2 <- df_long2[-grep("HD2", df_long2$characteristic),]#only contains BD1 & HD1
## BD1/HD1 for manual (since they only have one) and BD2/HD2 for digital
df_long3 <- df_long %>%
filter(
# Keep everything for Hand measurements
method == "man" |
# Keep all digital measurements except BD1 and HD1
!(method == "dig" & characteristic %in% c("BD1", "HD1"))
)
#to ensure comparisons between BD2 and BD1 in this new data frame, will combine characteristic names to just BD or HD
df_long3<- df_long3 %>%
mutate(
characteristic = case_when(
characteristic %in% c("BD1", "BD2") ~ "BD",
characteristic %in% c("HD1", "HD2") ~ "HD",
TRUE ~ characteristic # Leave other characteristics unchanged
)
)
Wide format
df_wide <- df_long2 %>%
pivot_wider(
names_from = method,
values_from = value
)
df_wide2 <- df_long3 %>%
pivot_wider(
names_from = method,
values_from = value
)
Results: no significant difference between hand or digital measurements when using BD2/HD2; suggestive difference in method when using BD1/HD1 (0.0584).
No effect of species (did’t expect there to be, but just in case).
# NOTE: analysis requires long format
#### for BD1 and HD1
anova_results <- aov(value~method * SPP, data = df_long2)
summary(anova_results)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 2 2.0122 3.584 0.0584 .
## SPP 2 2 0.9491 1.691 0.1845
## method:SPP 2 1 0.3224 0.574 0.5631
## Residuals 6774 3803 0.5614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_results2 <- aov(value~method + SPP, data = df_long2)
summary(anova_results2)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 2 2.0122 3.585 0.0584 .
## SPP 2 2 0.9491 1.691 0.1845
## Residuals 6776 3804 0.5613
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_results3 <- aov(value~method, data = df_long2)
summary(anova_results3)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 2 2.0122 3.584 0.0584 .
## Residuals 6778 3805 0.5614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### for BD2 and HD2 in digital, BD1 and HD1 in hand
anova_results4 <- aov(value~method * SPP, data = df_long3)
summary(anova_results4)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 1 0.5344 0.931 0.335
## SPP 2 2 0.8128 1.416 0.243
## method:SPP 2 1 0.5398 0.941 0.390
## Residuals 6774 3888 0.5739
anova_results5 <- aov(value~method + SPP, data = df_long3)
summary(anova_results5)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 1 0.5344 0.931 0.335
## SPP 2 2 0.8128 1.416 0.243
## Residuals 6776 3889 0.5739
anova_results6 <- aov(value~method, data = df_long3)
summary(anova_results6)
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 1 0.5344 0.931 0.335
## Residuals 6778 3890 0.5740
Used to assess agreement between two measurement methods.
This analysis will only use BD1 and HD1 for both digital and manual measurements.
# requires wide data frame
library(ggplot2)
#compute averages and differences for Bland-Altman
df_ba <- df_wide %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
#### Plot for whole dataset
ba_full <- ggplot(df_ba, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full
The Bland-Altman plot for the whole dataset shows disticnt clusters. This is most likely caused from the differences in range for the different characteristic types. We will look at species and characteristics to see which influences the clustering.
##### Species
ba_full_SPP <- ggplot(df_ba, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_SPP
##### Characteristics
ba_full_CR <- ggplot(df_ba, aes(x = average, y = difference, color=characteristic)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_CR
We indeed see that characteristics drive the clustering. We also see that the data for Pre-dorsal length (PreDL) hover at and below the lower limit of agreement. This would suggest either a measurement bias in one/both of the measurement methods, or a biological difference, where certain species, sizes, or specific characteristics might be harder to measure accurately with one method compared to the other.
We’ll take a look at Bland-Altman plots for each characteristic now.
ba_by_char <- ggplot(df_ba, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
facet_wrap(~ characteristic, scales = "free") +
theme_minimal()
ba_by_char
### Check if there are species differences
ba_CR_SPP <- ggplot(df_ba, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
facet_wrap(~ characteristic, scales = "free") +
theme_minimal()
ba_CR_SPP
As before, we see that PreDL is at or below the lower limit of agreement. When colored by species, we don’t see any distinct clustering causing this positioning. Likewise, there are no distinct species clustering in any of the characteristics, with only a handful of outliers in each characteristic. This may be due to some of the individual fish being MUCH larger than the others.
This analysis will use BD2 and HD2 for digital, but BD1 and HD1 for manual measurements.
# requires wide data frame
#compute averages and differences for Bland-Altman
df_ba2 <- df_wide2 %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
#### Plot for whole dataset
ba_full2 <- ggplot(df_ba2, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full2
The Bland-Altman plot for the whole dataset shows disticnt clusters. This is most likely caused from the differences in range for the different characteristic types. We will look at species and characteristics to see which influences the clustering.
##### Species
ba_full_SPP2 <- ggplot(df_ba2, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_SPP2
##### Characteristics
ba_full_CR2 <- ggplot(df_ba2, aes(x = average, y = difference, color=characteristic)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_CR2
We indeed see that characteristics drive the clustering. We also see that the data for Pre-dorsal length (PreDL) hover at and below the lower limit of agreement. This would suggest either a measurement bias in one/both of the measurement methods, or a biological difference, where certain species, sizes, or specific characteristics might be harder to measure accurately with one method compared to the other.
We’ll take a look at Bland-Altman plots for each characteristic now.
ba_by_char2 <- ggplot(df_ba2, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
facet_wrap(~ characteristic, scales = "free") +
theme_minimal()
ba_by_char2
### Check if there are species differences
ba_CR_SPP2 <- ggplot(df_ba2, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
facet_wrap(~ characteristic, scales = "free") +
theme_minimal()
ba_CR_SPP2
We’re really only interested in how BD1, HD1, BD2, and HD2 compare, so let’s isolate those.
data_BD1 <- df_wide %>%
filter(characteristic == "BD1")
data_BD1 <- data_BD1 %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
data_HD1 <- df_wide %>%
filter(characteristic == "HD1")
data_HD1 <- data_HD1 %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
data_BD2 <- df_wide2 %>%
filter(characteristic == "BD")
data_BD2 <- data_BD2 %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
data_HD2 <- df_wide2 %>%
filter(characteristic == "HD")
data_HD2 <- data_HD2 %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
ba_BD1 <- ggplot(data_BD1, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(data_BD1$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(data_BD1$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_BD1$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "BD1",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_HD1 <- ggplot(data_HD1, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(data_HD1$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(data_HD1$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_HD1$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "HD1",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_BD2 <- ggplot(data_BD2, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(data_BD2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(data_BD2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_BD2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "BD2",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_HD2 <- ggplot(data_HD2, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(data_HD2$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(data_HD2$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(data_HD2$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "HD2",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(ba_BD1, ba_BD2, ba_HD1, ba_HD2)
We see that BD1 has less scatter, less outside LOA and a smaller bias compared to BD2 (0.12 vs 0.36). In addition, BD2’s LOA don’t include 0, so BD1 is more appropriate to use. In contrast, HD1, while it has less scatter with fewer points outside the LOA, it has a bias further away from 0 compared to HD2 (which has slightly more scatter and more points outside LOA), so HD2 is more appropriate to use (0.06 vs -0.008).
I will therefore create a final dataset with BD1 and HD2, and re-run the ANOVAs and overall Bland-Altman plots.
Shows a significant difference between methods (p<0.0001) BUT when we calculate effect sizes, the effect size is SUPER small (0.00132). This suggests that while there is a detectable difference between the means of the two methods, this differences is negligible.
library(car)
raw3 <- raw2 %>%
rename(HD.dig = HD2.dig, HD.man = HD1.man)
char <- cbind(raw3$SL.dig, raw3$BD1.dig, raw3$CPD.dig, raw3$CPL.dig, raw3$PreDL.dig, raw3$DbL.dig, raw3$HL.dig, raw3$HD.dig, raw3$SnL.dig, raw3$OL.dig, raw3$SL.man, raw3$BD1.man, raw3$CPD.man, raw3$CPL.man, raw3$PreDL.man, raw3$DbL.man, raw3$HL.man, raw3$HD.man, raw3$SnL.man, raw3$OL.man)
method <- factor(c(rep("dig", 10), rep("man", 10)))
characteristics <- factor(c(
"SL", "BD1", "CPD", "CPL", "PreDL", "DbL", "HL", "HD", "SnL", "OL"
))
manova_model <- Anova(
lm(char ~ 1), # No predictors, as we're only modeling within-subject effects
idata = data.frame(method = method, characteristics = characteristics),
idesign = ~ method * characteristics,
type = "III"
)
# View results
summary(manova_model, multivariate = TRUE)
##
## Type III Repeated Measures MANOVA Tests:
##
## ------------------------------------------
##
## Term: (Intercept)
##
## Response transformation matrix:
## (Intercept)
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
## [7,] 1
## [8,] 1
## [9,] 1
## [10,] 1
## [11,] 1
## [12,] 1
## [13,] 1
## [14,] 1
## [15,] 1
## [16,] 1
## [17,] 1
## [18,] 1
## [19,] 1
## [20,] 1
##
## Sum of squares and products for the hypothesis:
## (Intercept)
## (Intercept) 603957.1
##
## Multivariate Tests: (Intercept)
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.99254 44991.22 1 338 < 2.22e-16 ***
## Wilks 1 0.00746 44991.22 1 338 < 2.22e-16 ***
## Hotelling-Lawley 1 133.11011 44991.22 1 338 < 2.22e-16 ***
## Roy 1 133.11011 44991.22 1 338 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: method
##
## Response transformation matrix:
## method1
## [1,] 1
## [2,] 1
## [3,] 1
## [4,] 1
## [5,] 1
## [6,] 1
## [7,] 1
## [8,] 1
## [9,] 1
## [10,] 1
## [11,] -1
## [12,] -1
## [13,] -1
## [14,] -1
## [15,] -1
## [16,] -1
## [17,] -1
## [18,] -1
## [19,] -1
## [20,] -1
##
## Sum of squares and products for the hypothesis:
## method1
## method1 57.70577
##
## Multivariate Tests: method
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.4467077 272.8886 1 338 < 2.22e-16 ***
## Wilks 1 0.5532923 272.8886 1 338 < 2.22e-16 ***
## Hotelling-Lawley 1 0.8073628 272.8886 1 338 < 2.22e-16 ***
## Roy 1 0.8073628 272.8886 1 338 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: characteristics
##
## Response transformation matrix:
## characteristics1 characteristics2 characteristics3 characteristics4
## [1,] 0 0 0 0
## [2,] 1 0 0 0
## [3,] 0 1 0 0
## [4,] 0 0 1 0
## [5,] 0 0 0 0
## [6,] 0 0 0 1
## [7,] 0 0 0 0
## [8,] 0 0 0 0
## [9,] -1 -1 -1 -1
## [10,] 0 0 0 0
## [11,] 0 0 0 0
## [12,] 1 0 0 0
## [13,] 0 1 0 0
## [14,] 0 0 1 0
## [15,] 0 0 0 0
## [16,] 0 0 0 1
## [17,] 0 0 0 0
## [18,] 0 0 0 0
## [19,] -1 -1 -1 -1
## [20,] 0 0 0 0
## characteristics5 characteristics6 characteristics7 characteristics8
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
## [5,] 0 0 0 1
## [6,] 0 0 0 0
## [7,] 0 1 0 0
## [8,] 1 0 0 0
## [9,] -1 -1 -1 -1
## [10,] 0 0 1 0
## [11,] 0 0 0 0
## [12,] 0 0 0 0
## [13,] 0 0 0 0
## [14,] 0 0 0 0
## [15,] 0 0 0 1
## [16,] 0 0 0 0
## [17,] 0 1 0 0
## [18,] 1 0 0 0
## [19,] -1 -1 -1 -1
## [20,] 0 0 1 0
## characteristics9
## [1,] 1
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
## [9,] -1
## [10,] 0
## [11,] 1
## [12,] 0
## [13,] 0
## [14,] 0
## [15,] 0
## [16,] 0
## [17,] 0
## [18,] 0
## [19,] -1
## [20,] 0
##
## Sum of squares and products for the hypothesis:
## characteristics1 characteristics2 characteristics3
## characteristics1 3009.7137 1774.831 2874.5166
## characteristics2 1774.8312 1046.620 1695.1053
## characteristics3 2874.5166 1695.105 2745.3926
## characteristics4 2111.6622 1245.249 2016.8058
## characteristics5 2101.6229 1239.329 2007.2174
## characteristics6 2209.9300 1303.198 2110.6594
## characteristics7 265.9196 156.813 253.9744
## characteristics8 3539.4828 2087.237 3380.4883
## characteristics9 5237.3119 3088.448 5002.0504
## characteristics4 characteristics5 characteristics6
## characteristics1 2111.6622 2101.6229 2209.9300
## characteristics2 1245.2493 1239.3291 1303.1979
## characteristics3 2016.8058 2007.2174 2110.6594
## characteristics4 1481.5752 1474.5315 1550.5214
## characteristics5 1474.5315 1467.5212 1543.1499
## characteristics6 1550.5214 1543.1499 1622.6761
## characteristics7 186.5733 185.6863 195.2557
## characteristics8 2483.3565 2471.5500 2598.9213
## characteristics9 3674.5799 3657.1101 3845.5792
## characteristics7 characteristics8 characteristics9
## characteristics1 265.9196 3539.4828 5237.3119
## characteristics2 156.8130 2087.2366 3088.4481
## characteristics3 253.9744 3380.4883 5002.0504
## characteristics4 186.5733 2483.3565 3674.5799
## characteristics5 185.6863 2471.5500 3657.1101
## characteristics6 195.2557 2598.9213 3845.5792
## characteristics7 23.4950 312.7267 462.7363
## characteristics8 312.7267 4162.5017 6159.1822
## characteristics9 462.7363 6159.1822 9113.6360
##
## Multivariate Tests: characteristics
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 1.000 180274.6 9 330 < 2.22e-16 ***
## Wilks 1 0.000 180274.6 9 330 < 2.22e-16 ***
## Hotelling-Lawley 1 4916.579 180274.6 9 330 < 2.22e-16 ***
## Roy 1 4916.579 180274.6 9 330 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## ------------------------------------------
##
## Term: method:characteristics
##
## Response transformation matrix:
## method1:characteristics1 method1:characteristics2
## [1,] 0 0
## [2,] 1 0
## [3,] 0 1
## [4,] 0 0
## [5,] 0 0
## [6,] 0 0
## [7,] 0 0
## [8,] 0 0
## [9,] -1 -1
## [10,] 0 0
## [11,] 0 0
## [12,] -1 0
## [13,] 0 -1
## [14,] 0 0
## [15,] 0 0
## [16,] 0 0
## [17,] 0 0
## [18,] 0 0
## [19,] 1 1
## [20,] 0 0
## method1:characteristics3 method1:characteristics4
## [1,] 0 0
## [2,] 0 0
## [3,] 0 0
## [4,] 1 0
## [5,] 0 0
## [6,] 0 1
## [7,] 0 0
## [8,] 0 0
## [9,] -1 -1
## [10,] 0 0
## [11,] 0 0
## [12,] 0 0
## [13,] 0 0
## [14,] -1 0
## [15,] 0 0
## [16,] 0 -1
## [17,] 0 0
## [18,] 0 0
## [19,] 1 1
## [20,] 0 0
## method1:characteristics5 method1:characteristics6
## [1,] 0 0
## [2,] 0 0
## [3,] 0 0
## [4,] 0 0
## [5,] 0 0
## [6,] 0 0
## [7,] 0 1
## [8,] 1 0
## [9,] -1 -1
## [10,] 0 0
## [11,] 0 0
## [12,] 0 0
## [13,] 0 0
## [14,] 0 0
## [15,] 0 0
## [16,] 0 0
## [17,] 0 -1
## [18,] -1 0
## [19,] 1 1
## [20,] 0 0
## method1:characteristics7 method1:characteristics8
## [1,] 0 0
## [2,] 0 0
## [3,] 0 0
## [4,] 0 0
## [5,] 0 1
## [6,] 0 0
## [7,] 0 0
## [8,] 0 0
## [9,] -1 -1
## [10,] 1 0
## [11,] 0 0
## [12,] 0 0
## [13,] 0 0
## [14,] 0 0
## [15,] 0 -1
## [16,] 0 0
## [17,] 0 0
## [18,] 0 0
## [19,] 1 1
## [20,] -1 0
## method1:characteristics9
## [1,] 1
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
## [9,] -1
## [10,] 0
## [11,] -1
## [12,] 0
## [13,] 0
## [14,] 0
## [15,] 0
## [16,] 0
## [17,] 0
## [18,] 0
## [19,] 1
## [20,] 0
##
## Sum of squares and products for the hypothesis:
## method1:characteristics1 method1:characteristics2
## method1:characteristics1 32.697209 30.67555
## method1:characteristics2 30.675546 28.77888
## method1:characteristics3 34.815625 32.66298
## method1:characteristics4 18.166843 17.04359
## method1:characteristics5 19.154595 17.97027
## method1:characteristics6 4.885489 4.58342
## method1:characteristics7 19.017180 17.84135
## method1:characteristics8 -28.692680 -26.91862
## method1:characteristics9 25.653473 24.06732
## method1:characteristics3 method1:characteristics4
## method1:characteristics1 34.815625 18.166843
## method1:characteristics2 32.662981 17.043590
## method1:characteristics3 37.071291 19.343852
## method1:characteristics4 19.343852 10.093650
## method1:characteristics5 20.395600 10.642453
## method1:characteristics6 5.202014 2.714418
## method1:characteristics7 20.249282 10.566104
## method1:characteristics8 -30.551647 -15.941893
## method1:characteristics9 27.315534 14.253284
## method1:characteristics5 method1:characteristics6
## method1:characteristics1 19.154595 4.8854885
## method1:characteristics2 17.970270 4.5834196
## method1:characteristics3 20.395600 5.2020139
## method1:characteristics4 10.642453 2.7144183
## method1:characteristics5 11.221096 2.8620044
## method1:characteristics6 2.862004 0.7299705
## method1:characteristics7 11.140595 2.8414723
## method1:characteristics8 -16.808672 -4.2871475
## method1:characteristics9 15.028252 3.8330412
## method1:characteristics7 method1:characteristics8
## method1:characteristics1 19.017180 -28.692680
## method1:characteristics2 17.841351 -26.918617
## method1:characteristics3 20.249282 -30.551647
## method1:characteristics4 10.566104 -15.941893
## method1:characteristics5 11.140595 -16.808672
## method1:characteristics6 2.841472 -4.287148
## method1:characteristics7 11.060673 -16.688087
## method1:characteristics8 -16.688087 25.178598
## method1:characteristics9 14.920439 -22.511613
## method1:characteristics9
## method1:characteristics1 25.653473
## method1:characteristics2 24.067324
## method1:characteristics3 27.315534
## method1:characteristics4 14.253284
## method1:characteristics5 15.028252
## method1:characteristics6 3.833041
## method1:characteristics7 14.920439
## method1:characteristics8 -22.511613
## method1:characteristics9 20.127122
##
## Multivariate Tests: method:characteristics
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.98321 2147.159 9 330 < 2.22e-16 ***
## Wilks 1 0.01679 2147.159 9 330 < 2.22e-16 ***
## Hotelling-Lawley 1 58.55888 2147.159 9 330 < 2.22e-16 ***
## Roy 1 58.55888 2147.159 9 330 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
##
## Sum Sq num Df Error SS den Df F value Pr(>F)
## (Intercept) 30197.9 1 226.864 338 44991.22 < 2.2e-16 ***
## method 2.9 1 3.574 338 272.89 < 2.2e-16 ***
## characteristics 3452.6 9 59.466 3042 19624.15 < 2.2e-16 ***
## method:characteristics 51.1 9 16.041 3042 1076.43 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Mauchly Tests for Sphericity
##
## Test statistic p-value
## characteristics 0.0004014 0.0000e+00
## method:characteristics 0.0083715 5.7648e-307
##
##
## Greenhouse-Geisser and Huynh-Feldt Corrections
## for Departure from Sphericity
##
## GG eps Pr(>F[GG])
## characteristics 0.31317 < 2.2e-16 ***
## method:characteristics 0.48496 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## HF eps Pr(>F[HF])
## characteristics 0.3160713 0
## method:characteristics 0.4920936 0
pillai_method <- 0.4467077
num_df_method <- 1
den_df_method <- 338
# Calculate partial eta-squared for Method
eta_squared_method <- pillai_method / (pillai_method + (den_df_method / num_df_method))
# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.001319876
Shows a significant difference between methods (p<0.0001) with a large effect size (0.447) BUT this is on all the data. Not sure if this makes sense to do. Would be better to run repeated measures ANOVA on one trait at a time…
anova_fin <- aov(value~method + Error(ID/method), data=df_long_fin)
summary(anova_fin)
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 338 226.9 0.6712
##
## Error: ID:method
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 2.885 2.8853 272.9 <2e-16 ***
## Residuals 338 3.574 0.0106
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 6102 3579 0.5866
SS_method <- 2.885 # Sum of squares for method
SS_error <- 3.574 # Sum of squares for residuals (error) from ID:method
# Calculate Partial Eta-Squared for Method
eta_squared_method <- SS_method / (SS_method + SS_error)
# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.4466636
df_ba_fin <- df_wide_fin %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
ba_full_fin <- ggplot(df_ba_fin, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_fin
##### Species
ba_full_SPP_fin <- ggplot(df_ba_fin, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_SPP_fin
##### Characteristics
ba_full_CR_fin <- ggplot(df_ba_fin, aes(x = average, y = difference, color=characteristic)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_full_CR_fin
##### individual characteristics
ba_by_char_fin <- ggplot(df_ba_fin, aes(x = average, y = difference)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba_fin$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba_fin$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
facet_wrap(~ characteristic, scales = "free") +
theme_minimal()
ba_by_char_fin
NOTE: this looping below isn’t working because the structure of ‘stats’ does not contain the p-value (even though printing stats does show a p-value). Contacted package author for more assistance. unique_characteristics <- unique(df_long$characteristic)
statistics_list <- lapply(unique_characteristics, function(char) {
# Filter data for the current characteristic (two rows, one for each method) df_char <- df_long %>% filter(characteristic == char)
# Apply blandr.statistics function to the Digital and Hand values stats <- blandr.statistics( x = df_char\(value[df_char\)method == “dig”], y = df_char\(value[df_char\)method == “man”] )
# Extract desired statistics from the results p_value <- stats\(p_value mean_diff <- stats\)bias lower_limit <- stats\(lowerLOA upper_limit <- stats\)upperLOA
# Return a data frame with the statistics for this characteristic return(data.frame( Characteristic = char, p_value = p_value, mean_difference = mean_diff, mean = mean, lower_limit = lower_limit, upper_limit = upper_limit )) })
Literally all (but like HD2 and OL) are significant. HOWEVER, the bias is INCREDIBLY close to zero (suggesting no difference between the methods) and the LOAs include 0 (which also suggest that no difference between the methods is possible).
library(blandr)
## Warning: package 'blandr' was built under R version 4.4.2
### Whole dataset
(bar_full <- blandr.statistics(df_wide$dig, df_wide$man))
## Bland-Altman Statistics
## =======================
## t = -9.7766, df = 3389, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 3390
## Maximum value for average measures: 4.042436
## Minimum value for average measures: 0.3834636
## Maximum value for difference in measures: 0.5562302
## Minimum value for difference in measures: -0.9342704
##
## Bias: -0.03445528
## Standard deviation of bias: 0.2051953
##
## Standard error of bias: 0.003524258
## Standard error for limits of agreement: 0.00602359
##
## Bias: -0.03445528
## Bias- upper 95% CI: -0.02754539
## Bias- lower 95% CI: -0.04136516
##
## Upper limit of agreement: 0.3677276
## Upper LOA- upper 95% CI: 0.3795378
## Upper LOA- lower 95% CI: 0.3559173
##
## Lower limit of agreement: -0.4366381
## Lower LOA- upper 95% CI: -0.4248279
## Lower LOA- lower 95% CI: -0.4484484
##
## =======================
## Derived measures:
## Mean of differences/means: -2.604907
## Point estimate of bias as proportion of lowest average: -8.985281
## Point estimate of bias as proportion of highest average -0.8523395
## Spread of data between lower and upper LoAs: 0.8043657
## Bias as proportion of LoA spread: -4.283534
##
## =======================
## Bias:
## -0.03445528 ( -0.04136516 to -0.02754539 )
## ULoA:
## 0.3677276 ( 0.3559173 to 0.3795378 )
## LLoA:
## -0.4366381 ( -0.4484484 to -0.4248279 )
(bar_full2 <- blandr.statistics(df_wide2$dig, df_wide2$man))
## Bland-Altman Statistics
## =======================
## t = -4.3957, df = 3389, p-value = 1.138e-05
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 3390
## Maximum value for average measures: 4.042436
## Minimum value for average measures: 0.3834636
## Maximum value for difference in measures: 0.746832
## Minimum value for difference in measures: -0.9342704
##
## Bias: -0.01775692
## Standard deviation of bias: 0.2352018
##
## Standard error of bias: 0.004039623
## Standard error for limits of agreement: 0.006904443
##
## Bias: -0.01775692
## Bias- upper 95% CI: -0.009836577
## Bias- lower 95% CI: -0.02567727
##
## Upper limit of agreement: 0.4432387
## Upper LOA- upper 95% CI: 0.456776
## Upper LOA- lower 95% CI: 0.4297014
##
## Lower limit of agreement: -0.4787525
## Lower LOA- upper 95% CI: -0.4652152
## Lower LOA- lower 95% CI: -0.4922898
##
## =======================
## Derived measures:
## Mean of differences/means: -2.042943
## Point estimate of bias as proportion of lowest average: -4.630667
## Point estimate of bias as proportion of highest average -0.4392629
## Spread of data between lower and upper LoAs: 0.9219912
## Bias as proportion of LoA spread: -1.925932
##
## =======================
## Bias:
## -0.01775692 ( -0.02567727 to -0.009836577 )
## ULoA:
## 0.4432387 ( 0.4297014 to 0.456776 )
## LLoA:
## -0.4787525 ( -0.4922898 to -0.4652152 )
### By characteristic
df_SL <- df_wide %>%
filter(characteristic == "SL")
(bar_SL <- blandr.statistics(df_SL$dig, df_SL$man))
## Bland-Altman Statistics
## =======================
## t = 23.922, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 4.042436
## Minimum value for average measures: 2.960844
## Maximum value for difference in measures: 0.1785351
## Minimum value for difference in measures: -0.2129669
##
## Bias: 0.05387797
## Standard deviation of bias: 0.04146884
##
## Standard error of bias: 0.002252278
## Standard error for limits of agreement: 0.003852918
##
## Bias: 0.05387797
## Bias- upper 95% CI: 0.05830822
## Bias- lower 95% CI: 0.04944772
##
## Upper limit of agreement: 0.1351569
## Upper LOA- upper 95% CI: 0.1427356
## Upper LOA- lower 95% CI: 0.1275782
##
## Lower limit of agreement: -0.02740096
## Lower LOA- upper 95% CI: -0.01982224
## Lower LOA- lower 95% CI: -0.03497967
##
## =======================
## Derived measures:
## Mean of differences/means: 1.517899
## Point estimate of bias as proportion of lowest average: 1.819683
## Point estimate of bias as proportion of highest average 1.332809
## Spread of data between lower and upper LoAs: 0.1625579
## Bias as proportion of LoA spread: 33.14387
##
## =======================
## Bias:
## 0.05387797 ( 0.04944772 to 0.05830822 )
## ULoA:
## 0.1351569 ( 0.1275782 to 0.1427356 )
## LLoA:
## -0.02740096 ( -0.03497967 to -0.01982224 )
df_BD1 <- df_wide %>%
filter(characteristic == "BD1")
(bar_BD1 <- blandr.statistics(df_BD1$dig, df_BD1$man))
## Bland-Altman Statistics
## =======================
## t = 30.615, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 3.121966
## Minimum value for average measures: 1.748914
## Maximum value for difference in measures: 0.4951842
## Minimum value for difference in measures: -0.2418473
##
## Bias: 0.1207813
## Standard deviation of bias: 0.07263929
##
## Standard error of bias: 0.003945225
## Standard error for limits of agreement: 0.006749001
##
## Bias: 0.1207813
## Bias- upper 95% CI: 0.1285416
## Bias- lower 95% CI: 0.113021
##
## Upper limit of agreement: 0.2631543
## Upper LOA- upper 95% CI: 0.2764297
## Upper LOA- lower 95% CI: 0.249879
##
## Lower limit of agreement: -0.02159169
## Lower LOA- upper 95% CI: -0.00831636
## Lower LOA- lower 95% CI: -0.03486703
##
## =======================
## Derived measures:
## Mean of differences/means: 4.995667
## Point estimate of bias as proportion of lowest average: 6.906076
## Point estimate of bias as proportion of highest average 3.868758
## Spread of data between lower and upper LoAs: 0.284746
## Bias as proportion of LoA spread: 42.41721
##
## =======================
## Bias:
## 0.1207813 ( 0.113021 to 0.1285416 )
## ULoA:
## 0.2631543 ( 0.249879 to 0.2764297 )
## LLoA:
## -0.02159169 ( -0.03486703 to -0.00831636 )
df_BD2 <- df_wide2 %>%
filter(characteristic == "BD")
(bar_BD2 <- blandr.statistics(df_BD2$dig, df_BD2$man))
## Bland-Altman Statistics
## =======================
## t = 52.87, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 3.163316
## Minimum value for average measures: 1.876811
## Maximum value for difference in measures: 0.746832
## Minimum value for difference in measures: -0.09538079
##
## Bias: 0.3557936
## Standard deviation of bias: 0.1239048
##
## Standard error of bias: 0.006729585
## Standard error for limits of agreement: 0.01151214
##
## Bias: 0.3557936
## Bias- upper 95% CI: 0.3690308
## Bias- lower 95% CI: 0.3425565
##
## Upper limit of agreement: 0.598647
## Upper LOA- upper 95% CI: 0.6212915
## Upper LOA- lower 95% CI: 0.5760026
##
## Lower limit of agreement: 0.1129402
## Lower LOA- upper 95% CI: 0.1355847
## Lower LOA- lower 95% CI: 0.09029573
##
## =======================
## Derived measures:
## Mean of differences/means: 14.04186
## Point estimate of bias as proportion of lowest average: 18.95735
## Point estimate of bias as proportion of highest average 11.24749
## Spread of data between lower and upper LoAs: 0.4857068
## Bias as proportion of LoA spread: 73.25275
##
## =======================
## Bias:
## 0.3557936 ( 0.3425565 to 0.3690308 )
## ULoA:
## 0.598647 ( 0.5760026 to 0.6212915 )
## LLoA:
## 0.1129402 ( 0.09029573 to 0.1355847 )
df_CPD <- df_wide %>%
filter(characteristic == "CPD")
(bar_CPD <- blandr.statistics(df_CPD$dig, df_CPD$man))
## Bland-Altman Statistics
## =======================
## t = 27.399, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.471218
## Minimum value for average measures: 1.170403
## Maximum value for difference in measures: 0.5562302
## Minimum value for difference in measures: -0.1550393
##
## Bias: 0.101579
## Standard deviation of bias: 0.06825963
##
## Standard error of bias: 0.003707354
## Standard error for limits of agreement: 0.006342082
##
## Bias: 0.101579
## Bias- upper 95% CI: 0.1088714
## Bias- lower 95% CI: 0.09428661
##
## Upper limit of agreement: 0.2353679
## Upper LOA- upper 95% CI: 0.2478428
## Upper LOA- lower 95% CI: 0.222893
##
## Lower limit of agreement: -0.03220987
## Lower LOA- upper 95% CI: -0.01973495
## Lower LOA- lower 95% CI: -0.04468479
##
## =======================
## Derived measures:
## Mean of differences/means: 5.715623
## Point estimate of bias as proportion of lowest average: 8.678974
## Point estimate of bias as proportion of highest average 4.110483
## Spread of data between lower and upper LoAs: 0.2675778
## Bias as proportion of LoA spread: 37.96243
##
## =======================
## Bias:
## 0.101579 ( 0.09428661 to 0.1088714 )
## ULoA:
## 0.2353679 ( 0.222893 to 0.2478428 )
## LLoA:
## -0.03220987 ( -0.04468479 to -0.01973495 )
df_CPL <- df_wide %>%
filter(characteristic == "CPL")
(bar_CPL <- blandr.statistics(df_CPL$dig, df_CPL$man))
## Bland-Altman Statistics
## =======================
## t = 30.741, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.890711
## Minimum value for average measures: 1.76216
## Maximum value for difference in measures: 0.3986366
## Minimum value for difference in measures: -0.19231
##
## Bias: 0.1409026
## Standard deviation of bias: 0.08439069
##
## Standard error of bias: 0.004583473
## Standard error for limits of agreement: 0.007840837
##
## Bias: 0.1409026
## Bias- upper 95% CI: 0.1499183
## Bias- lower 95% CI: 0.1318869
##
## Upper limit of agreement: 0.3063084
## Upper LOA- upper 95% CI: 0.3217314
## Upper LOA- lower 95% CI: 0.2908854
##
## Lower limit of agreement: -0.02450314
## Lower LOA- upper 95% CI: -0.00908016
## Lower LOA- lower 95% CI: -0.03992613
##
## =======================
## Derived measures:
## Mean of differences/means: 5.963766
## Point estimate of bias as proportion of lowest average: 7.996017
## Point estimate of bias as proportion of highest average 4.874324
## Spread of data between lower and upper LoAs: 0.3308115
## Bias as proportion of LoA spread: 42.59302
##
## =======================
## Bias:
## 0.1409026 ( 0.1318869 to 0.1499183 )
## ULoA:
## 0.3063084 ( 0.2908854 to 0.3217314 )
## LLoA:
## -0.02450314 ( -0.03992613 to -0.00908016 )
df_PreDL <- df_wide %>%
filter(characteristic == "PreDL")
(bar_PreDL <- blandr.statistics(df_PreDL$dig, df_PreDL$man))
## Bland-Altman Statistics
## =======================
## t = -75.204, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 3.204808
## Minimum value for average measures: 2.05628
## Maximum value for difference in measures: 0.2376452
## Minimum value for difference in measures: -0.9116021
##
## Bias: -0.4623168
## Standard deviation of bias: 0.113188
##
## Standard error of bias: 0.006147531
## Standard error for limits of agreement: 0.01051643
##
## Bias: -0.4623168
## Bias- upper 95% CI: -0.4502246
## Bias- lower 95% CI: -0.4744091
##
## Upper limit of agreement: -0.2404683
## Upper LOA- upper 95% CI: -0.2197824
## Upper LOA- lower 95% CI: -0.2611542
##
## Lower limit of agreement: -0.6841654
## Lower LOA- upper 95% CI: -0.6634795
## Lower LOA- lower 95% CI: -0.7048513
##
## =======================
## Derived measures:
## Mean of differences/means: -17.19129
## Point estimate of bias as proportion of lowest average: -22.48317
## Point estimate of bias as proportion of highest average -14.42572
## Spread of data between lower and upper LoAs: 0.4436971
## Bias as proportion of LoA spread: -104.1965
##
## =======================
## Bias:
## -0.4623168 ( -0.4744091 to -0.4502246 )
## ULoA:
## -0.2404683 ( -0.2611542 to -0.2197824 )
## LLoA:
## -0.6841654 ( -0.7048513 to -0.6634795 )
df_DbL <- df_wide %>%
filter(characteristic == "DbL")
(bar_DbL <- blandr.statistics(df_DbL$dig, df_DbL$man))
## Bland-Altman Statistics
## =======================
## t = -2.8434, df = 338, p-value = 0.004736
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.786619
## Minimum value for average measures: 1.117524
## Maximum value for difference in measures: 0.2886718
## Minimum value for difference in measures: -0.9342704
##
## Bias: -0.01723212
## Standard deviation of bias: 0.1115855
##
## Standard error of bias: 0.006060494
## Standard error for limits of agreement: 0.01036754
##
## Bias: -0.01723212
## Bias- upper 95% CI: -0.005311085
## Bias- lower 95% CI: -0.02915316
##
## Upper limit of agreement: 0.2014755
## Upper LOA- upper 95% CI: 0.2218686
## Upper LOA- lower 95% CI: 0.1810825
##
## Lower limit of agreement: -0.2359398
## Lower LOA- upper 95% CI: -0.2155467
## Lower LOA- lower 95% CI: -0.2563328
##
## =======================
## Derived measures:
## Mean of differences/means: -1.003452
## Point estimate of bias as proportion of lowest average: -1.541991
## Point estimate of bias as proportion of highest average -0.6183882
## Spread of data between lower and upper LoAs: 0.4374153
## Bias as proportion of LoA spread: -3.939533
##
## =======================
## Bias:
## -0.01723212 ( -0.02915316 to -0.005311085 )
## ULoA:
## 0.2014755 ( 0.1810825 to 0.2218686 )
## LLoA:
## -0.2359398 ( -0.2563328 to -0.2155467 )
df_HL <- df_wide %>%
filter(characteristic == "HL")
(bar_HL <- blandr.statistics(df_HL$dig, df_HL$man))
## Bland-Altman Statistics
## =======================
## t = -16.151, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.565974
## Minimum value for average measures: 1.480514
## Maximum value for difference in measures: 0.528404
## Minimum value for difference in measures: -0.7045543
##
## Bias: -0.1433821
## Standard deviation of bias: 0.163451
##
## Standard error of bias: 0.008877438
## Standard error for limits of agreement: 0.01518642
##
## Bias: -0.1433821
## Bias- upper 95% CI: -0.1259201
## Bias- lower 95% CI: -0.1608441
##
## Upper limit of agreement: 0.1769818
## Upper LOA- upper 95% CI: 0.2068536
## Upper LOA- lower 95% CI: 0.14711
##
## Lower limit of agreement: -0.463746
## Lower LOA- upper 95% CI: -0.4338742
## Lower LOA- lower 95% CI: -0.4936178
##
## =======================
## Derived measures:
## Mean of differences/means: -7.262377
## Point estimate of bias as proportion of lowest average: -9.684617
## Point estimate of bias as proportion of highest average -5.587823
## Spread of data between lower and upper LoAs: 0.6407278
## Bias as proportion of LoA spread: -22.378
##
## =======================
## Bias:
## -0.1433821 ( -0.1608441 to -0.1259201 )
## ULoA:
## 0.1769818 ( 0.14711 to 0.2068536 )
## LLoA:
## -0.463746 ( -0.4936178 to -0.4338742 )
df_HD1 <- df_wide %>%
filter(characteristic == "HD1")
(bar_HD1 <- blandr.statistics(df_HD1$dig, df_HD1$man))
## Bland-Altman Statistics
## =======================
## t = 14.044, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.601825
## Minimum value for average measures: 1.451618
## Maximum value for difference in measures: 0.4229753
## Minimum value for difference in measures: -0.2031489
##
## Bias: 0.06017856
## Standard deviation of bias: 0.07889485
##
## Standard error of bias: 0.00428498
## Standard error for limits of agreement: 0.007330212
##
## Bias: 0.06017856
## Bias- upper 95% CI: 0.06860715
## Bias- lower 95% CI: 0.05174997
##
## Upper limit of agreement: 0.2148125
## Upper LOA- upper 95% CI: 0.229231
## Upper LOA- lower 95% CI: 0.2003939
##
## Lower limit of agreement: -0.09445534
## Lower LOA- upper 95% CI: -0.08003676
## Lower LOA- lower 95% CI: -0.1088739
##
## =======================
## Derived measures:
## Mean of differences/means: 3.007816
## Point estimate of bias as proportion of lowest average: 4.14562
## Point estimate of bias as proportion of highest average 2.312937
## Spread of data between lower and upper LoAs: 0.3092678
## Bias as proportion of LoA spread: 19.4584
##
## =======================
## Bias:
## 0.06017856 ( 0.05174997 to 0.06860715 )
## ULoA:
## 0.2148125 ( 0.2003939 to 0.229231 )
## LLoA:
## -0.09445534 ( -0.1088739 to -0.08003676 )
df_HD2 <- df_wide2 %>%
filter(characteristic == "HD")
(bar_HD2 <- blandr.statistics(df_HD2$dig, df_HD2$man))
## Bland-Altman Statistics
## =======================
## t = -1.3693, df = 338, p-value = 0.1718
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 2.566481
## Minimum value for average measures: 1.391822
## Maximum value for difference in measures: 0.4478337
## Minimum value for difference in measures: -0.2905665
##
## Bias: -0.007850176
## Standard deviation of bias: 0.1055546
##
## Standard error of bias: 0.005732938
## Standard error for limits of agreement: 0.009807199
##
## Bias: -0.007850176
## Bias- upper 95% CI: 0.003426556
## Bias- lower 95% CI: -0.01912691
##
## Upper limit of agreement: 0.1990368
## Upper LOA- upper 95% CI: 0.2183276
## Upper LOA- lower 95% CI: 0.179746
##
## Lower limit of agreement: -0.2147372
## Lower LOA- upper 95% CI: -0.1954463
## Lower LOA- lower 95% CI: -0.234028
##
## =======================
## Derived measures:
## Mean of differences/means: -0.4187308
## Point estimate of bias as proportion of lowest average: -0.5640215
## Point estimate of bias as proportion of highest average -0.3058731
## Spread of data between lower and upper LoAs: 0.413774
## Bias as proportion of LoA spread: -1.897213
##
## =======================
## Bias:
## -0.007850176 ( -0.01912691 to 0.003426556 )
## ULoA:
## 0.1990368 ( 0.179746 to 0.2183276 )
## LLoA:
## -0.2147372 ( -0.234028 to -0.1954463 )
df_SnL <- df_wide %>%
filter(characteristic == "SnL")
(bar_SnL <- blandr.statistics(df_SnL$dig, df_SnL$man))
## Bland-Altman Statistics
## =======================
## t = -22.082, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 1.547597
## Minimum value for average measures: 0.3834636
## Maximum value for difference in measures: 0.2909172
## Minimum value for difference in measures: -0.7671152
##
## Bias: -0.1897858
## Standard deviation of bias: 0.158242
##
## Standard error of bias: 0.008594523
## Standard error for limits of agreement: 0.01470244
##
## Bias: -0.1897858
## Bias- upper 95% CI: -0.1728803
## Bias- lower 95% CI: -0.2066913
##
## Upper limit of agreement: 0.1203684
## Upper LOA- upper 95% CI: 0.1492882
## Upper LOA- lower 95% CI: 0.0914486
##
## Lower limit of agreement: -0.49994
## Lower LOA- upper 95% CI: -0.4710202
## Lower LOA- lower 95% CI: -0.5288599
##
## =======================
## Derived measures:
## Mean of differences/means: -21.11482
## Point estimate of bias as proportion of lowest average: -49.49253
## Point estimate of bias as proportion of highest average -12.26326
## Spread of data between lower and upper LoAs: 0.6203085
## Bias as proportion of LoA spread: -30.59539
##
## =======================
## Bias:
## -0.1897858 ( -0.2066913 to -0.1728803 )
## ULoA:
## 0.1203684 ( 0.0914486 to 0.1492882 )
## LLoA:
## -0.49994 ( -0.5288599 to -0.4710202 )
df_OL <- df_wide %>%
filter(characteristic == "OL")
(bar_OL <- blandr.statistics(df_OL$dig, df_OL$man))
## Bland-Altman Statistics
## =======================
## t = -1.7645, df = 338, p-value = 0.07855
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 1.471467
## Minimum value for average measures: 0.6198813
## Maximum value for difference in measures: 0.4116851
## Minimum value for difference in measures: -0.3320279
##
## Bias: -0.009155384
## Standard deviation of bias: 0.09553187
##
## Standard error of bias: 0.005188579
## Standard error for limits of agreement: 0.008875977
##
## Bias: -0.009155384
## Bias- upper 95% CI: 0.001050588
## Bias- lower 95% CI: -0.01936136
##
## Upper limit of agreement: 0.1780871
## Upper LOA- upper 95% CI: 0.1955462
## Upper LOA- lower 95% CI: 0.160628
##
## Lower limit of agreement: -0.1963978
## Lower LOA- upper 95% CI: -0.1789387
## Lower LOA- lower 95% CI: -0.213857
##
## =======================
## Derived measures:
## Mean of differences/means: -0.6778975
## Point estimate of bias as proportion of lowest average: -1.476958
## Point estimate of bias as proportion of highest average -0.6221944
## Spread of data between lower and upper LoAs: 0.3744849
## Bias as proportion of LoA spread: -2.444794
##
## =======================
## Bias:
## -0.009155384 ( -0.01936136 to 0.001050588 )
## ULoA:
## 0.1780871 ( 0.160628 to 0.1955462 )
## LLoA:
## -0.1963978 ( -0.213857 to -0.1789387 )
Closer look at the pre-dorsal measurement, since the values lie around the lower LOA, suggesting a difference in manual and digital measurements.
We see that the bias is close to zero () and the LOAs include zero and aren’t a wide range (), BUT all the points are at or below the lower LOA. This suggests that while the two methods are close on average, there is a systematic difference where manual measurements consistently yielded larger values than the digital ones.
df_predl <- df_wide %>%
filter(characteristic == "PreDL")
# Separate Digital and Hand values
digital_values <- df_predl$dig
hand_values <- df_predl$man
# Apply the blandr.statistics function
stats <- blandr.statistics(method1 = digital_values, method2 = hand_values)
# Extract the lower and upper limits of agreement
lower_limit <- stats$upperLOA
upper_limit <- stats$lowerLOA
# Calculate differences and filter for outliers
df_predl <- df_predl %>%
mutate(diff = dig - man) %>%
filter(diff < lower_limit | diff > upper_limit)
# View the outliers for PreDL
print(df_predl)
## # A tibble: 339 × 6
## ID SPP characteristic dig man diff
## <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 AD19-001 p.lat PreDL 2.72 3.10 -0.384
## 2 AD19-002 p.lat PreDL 2.53 3.04 -0.506
## 3 AD19-003 p.lat PreDL 2.82 3.25 -0.433
## 4 AD19-004 p.lat PreDL 2.33 2.83 -0.500
## 5 AD19-005 p.lat PreDL 2.64 3.05 -0.406
## 6 AD19-006 p.lat PreDL 2.62 3.02 -0.405
## 7 AD19-007 p.lat PreDL 2.44 2.91 -0.466
## 8 AD19-008 p.lat PreDL 2.45 2.86 -0.412
## 9 AD19-009 p.lat PreDL 2.33 2.91 -0.574
## 10 AD19-010 p.lat PreDL 2.63 3.04 -0.414
## # ℹ 329 more rows
cat("Mean Difference:", mean(df_predl$diff), "\n")
## Mean Difference: -0.4623168
cat("Lower Limit:", lower_limit, "\n")
## Lower Limit: -0.2404683
cat("Upper Limit:", upper_limit, "\n")
## Upper Limit: -0.6841654
df_PreDL <- df_predl %>%
mutate(
average = (dig + man) / 2,
difference = dig - man
)
ba_PreDL <- ggplot(df_PreDL, aes(x = average, y = difference, color=SPP)) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "solid", color = "black") +
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE), color = "red", linetype = "dashed") + # Mean line
geom_hline(yintercept = mean(df_ba$difference, na.rm = TRUE) + c(-1.96, 1.96) * sd(df_ba$difference, na.rm = TRUE),
color = "blue", linetype = "dashed") + # Limits of agreement
labs(title = "Bland-Altman Plots for All Characteristics",
x = "Average of Digital and Hand Measurements",
y = "Difference (Digital - Hand)") +
theme_minimal()
ba_PreDL
library(blandr)
blandr.statistics(df_PreDL$dig, df_PreDL$man)
## Bland-Altman Statistics
## =======================
## t = -75.204, df = 338, p-value = < 2.2e-16
## alternative hypothesis: true bias is not equal to 0
##
## =======================
## Number of comparisons: 339
## Maximum value for average measures: 3.204808
## Minimum value for average measures: 2.05628
## Maximum value for difference in measures: 0.2376452
## Minimum value for difference in measures: -0.9116021
##
## Bias: -0.4623168
## Standard deviation of bias: 0.113188
##
## Standard error of bias: 0.006147531
## Standard error for limits of agreement: 0.01051643
##
## Bias: -0.4623168
## Bias- upper 95% CI: -0.4502246
## Bias- lower 95% CI: -0.4744091
##
## Upper limit of agreement: -0.2404683
## Upper LOA- upper 95% CI: -0.2197824
## Upper LOA- lower 95% CI: -0.2611542
##
## Lower limit of agreement: -0.6841654
## Lower LOA- upper 95% CI: -0.6634795
## Lower LOA- lower 95% CI: -0.7048513
##
## =======================
## Derived measures:
## Mean of differences/means: -17.19129
## Point estimate of bias as proportion of lowest average: -22.48317
## Point estimate of bias as proportion of highest average -14.42572
## Spread of data between lower and upper LoAs: 0.4436971
## Bias as proportion of LoA spread: -104.1965
##
## =======================
## Bias:
## -0.4623168 ( -0.4744091 to -0.4502246 )
## ULoA:
## -0.2404683 ( -0.2611542 to -0.2197824 )
## LLoA:
## -0.6841654 ( -0.7048513 to -0.6634795 )
Significant (p<0.001) with a large effect size (0.943) meaning there is a significant difference in Pre-dorsal measurements between digital and manual. Based on the BA plot, it appears the manual measurements were higher than the digital measurements.
df_predl2 <- df_long %>%
filter(characteristic == "PreDL")
anova_predl <- aov(value~method + Error(ID/method), data=df_predl2)
summary(anova_predl)
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 338 28.1 0.08313
##
## Error: ID:method
## Df Sum Sq Mean Sq F value Pr(>F)
## method 1 36.23 36.23 5656 <2e-16 ***
## Residuals 338 2.17 0.01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SS_method <- 36.23 # Sum of squares for method
SS_error <- 2.17 # Sum of squares for residuals (error) from ID:method
# Calculate Partial Eta-Squared for Method
eta_squared_method <- SS_method / (SS_method + SS_error)
# Print the result
cat("Partial Eta-Squared (Method):", eta_squared_method, "\n")
## Partial Eta-Squared (Method): 0.9434896
In this analysis, I will compare the principle components after centering and scaling the data. A PCA analysis will help us determine which methodology influence the variation in our data the most without worrying about differences in scales/measurements.
library(stringr)
z.scores <- raw2
z.scores[, c(3:24)] <- scale(z.scores[, 3:24], center = TRUE, scale = TRUE)
PCA <- prcomp(z.scores[, 3:24])
summary(PCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 4.182 1.21217 0.84996 0.7357 0.56329 0.5180 0.45818
## Proportion of Variance 0.795 0.06679 0.03284 0.0246 0.01442 0.0122 0.00954
## Cumulative Proportion 0.795 0.86180 0.89463 0.9192 0.93366 0.9458 0.95540
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.42037 0.41587 0.33673 0.32661 0.30076 0.28121 0.25212
## Proportion of Variance 0.00803 0.00786 0.00515 0.00485 0.00411 0.00359 0.00289
## Cumulative Proportion 0.96343 0.97129 0.97644 0.98129 0.98540 0.98900 0.99189
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.22165 0.19780 0.16764 0.16404 0.12458 0.10221 0.07463
## Proportion of Variance 0.00223 0.00178 0.00128 0.00122 0.00071 0.00047 0.00025
## Cumulative Proportion 0.99412 0.99590 0.99718 0.99840 0.99911 0.99958 0.99983
## PC22
## Standard deviation 0.06048
## Proportion of Variance 0.00017
## Cumulative Proportion 1.00000
(loadings <- PCA$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## SL.dig -0.2347778 -0.096996721 0.01693687 -0.006176149 0.062160581
## BD1.dig -0.2284302 0.040438320 0.11828459 -0.082195318 -0.020081064
## BD2.dig -0.2265082 -0.204230055 0.04686845 -0.114485959 0.014698263
## CPD.dig -0.2331491 0.027815265 0.02843859 0.060726336 0.028640808
## CPL.dig -0.2233653 -0.122526759 -0.01765293 0.051407052 0.087380963
## PreDL.dig -0.1795926 -0.455754637 0.38328152 -0.035114502 0.036990294
## DbL.dig -0.1792735 0.492804564 0.15433728 0.093722366 0.132645722
## HL.dig -0.1881629 0.037841635 -0.69302734 -0.129545756 0.040276382
## HD1.dig -0.2314303 0.092691464 -0.15121241 -0.074290038 -0.049191518
## HD2.dig -0.2202611 0.126740445 -0.37372814 -0.114658545 -0.058170881
## SnL.dig -0.1887660 -0.209724253 -0.23206376 0.438310297 0.299520788
## OL.dig -0.2176019 -0.051613729 -0.08223996 -0.187395317 -0.246012805
## SL.man -0.2346467 -0.066007194 0.05419313 -0.013410667 0.043012548
## BD1.man -0.2233880 0.137447421 0.15671013 0.048596434 -0.036769031
## CPD.man -0.2275129 0.051199268 0.08915917 0.048228291 0.001670173
## CPL.man -0.2229489 -0.084361388 0.02254979 0.033420529 0.088728475
## PreDL.man -0.2160456 -0.280047719 0.02145309 -0.055310926 0.063545028
## DbL.man -0.1679576 0.536687777 0.19654399 0.059968129 0.115261547
## HL.man -0.2113179 0.033215809 0.05468238 -0.198535844 0.394539908
## HD1.man -0.2253717 0.081537288 0.14871885 -0.052250635 0.089215660
## SnL.man -0.1857245 -0.004219492 -0.02498576 0.725115394 -0.468553383
## OL.man -0.2025594 0.030649519 0.08639455 -0.347119680 -0.631563787
sorted.loadings.1 <- loadings[order(loadings[, 1]), 1]
myTitle <- "Loadings Plot for PC1"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.1, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.2 <- loadings[order(loadings[, 2]), 2]
myTitle <- "Loadings Plot for PC2"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.2, main=myTitle, xlab=myXlab, cex=1.5, col="red")
sorted.loadings.3 <- loadings[order(loadings[, 3]), 3]
myTitle <- "Loadings Plot for PC3"
myXlab <- "Variable Loadings"
dotchart(sorted.loadings.3, main=myTitle, xlab=myXlab, cex=1.5, col="red")
VM_PCA <- varimax(PCA$rotation[, 1:3])
VM_PCA
## $loadings
##
## Loadings:
## PC1 PC2 PC3
## SL.dig -0.116 -0.220
## BD1.dig -0.235 -0.112
## BD2.dig -0.302
## CPD.dig -0.190 -0.120
## CPL.dig -0.231
## PreDL.dig -0.493 0.376
## DbL.dig -0.469 0.276
## HL.dig 0.168 -0.698
## HD1.dig -0.142 -0.248
## HD2.dig -0.449
## SnL.dig -0.267 -0.231
## OL.dig -0.167 -0.147
## SL.man -0.151 -0.197
## BD1.man -0.303
## CPD.man -0.227 -0.101
## CPL.man -0.118 -0.203
## PreDL.man -0.354
## DbL.man -0.505 0.315
## HL.man -0.190 -0.104
## HD1.man -0.270
## SnL.man -0.115 -0.114
## OL.man -0.197 -0.102
##
## PC1 PC2 PC3
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
##
## $rotmat
## [,1] [,2] [,3]
## [1,] 0.6917396 0.60222957 0.3985170
## [2,] -0.5560565 0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105 0.8857027
library(factoextra)
(eig.val <- get_eigenvalue(PCA))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 17.490167246 79.50076021 79.50076
## Dim.2 1.469344948 6.67884067 86.17960
## Dim.3 0.722436480 3.28380218 89.46340
## Dim.4 0.541231220 2.46014191 91.92354
## Dim.5 0.317290132 1.44222787 93.36577
## Dim.6 0.268321832 1.21964469 94.58542
## Dim.7 0.209931770 0.95423532 95.53965
## Dim.8 0.176710702 0.80323046 96.34288
## Dim.9 0.172949491 0.78613405 97.12902
## Dim.10 0.113385668 0.51538940 97.64441
## Dim.11 0.106671751 0.48487160 98.12928
## Dim.12 0.090458276 0.41117398 98.54045
## Dim.13 0.079078144 0.35944611 98.89990
## Dim.14 0.063562571 0.28892078 99.18882
## Dim.15 0.049128830 0.22331286 99.41213
## Dim.16 0.039123834 0.17783561 99.58997
## Dim.17 0.028104434 0.12774743 99.71772
## Dim.18 0.026908568 0.12231167 99.84003
## Dim.19 0.015519246 0.07054203 99.91057
## Dim.20 0.010446972 0.04748624 99.95806
## Dim.21 0.005570293 0.02531951 99.98337
## Dim.22 0.003657591 0.01662541 100.00000
ind <- get_pca_ind(PCA)
head(ind$coord[,1:4])
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 -5.4067751 0.4298672 0.4151608 0.68096887
## 2 -5.9873007 1.2851793 -0.4499961 0.35356493
## 3 -8.8447937 0.4505076 0.3553199 0.79382477
## 4 0.3842836 1.0995520 -0.4421054 0.15666553
## 5 -3.6094489 0.6935377 0.4998015 -0.39342915
## 6 -3.9868310 0.8667640 0.2054317 -0.03320378
raw.p <- cbind(z.scores, ind$coord[,1:4])
In order to compare dimensions 1-4 between manual and digital measurements, I need to actually run a PCA on digital only and manual only data sets, then perform a paired t-test.
Let’s create the data sets
dig_p_long <- z.scores %>%
select(ID, SPP, ends_with(".dig"))
man_p_long <- z.scores %>%
select(ID, SPP, ends_with(".man"))
Now let’s run the individual PCAs
PCA_dig <- prcomp(dig_p_long[, 3:14])
summary(PCA_dig)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.1141 0.97826 0.74710 0.56729 0.41175 0.34804 0.25763
## Proportion of Variance 0.8081 0.07975 0.04651 0.02682 0.01413 0.01009 0.00553
## Cumulative Proportion 0.8081 0.88787 0.93438 0.96120 0.97533 0.98542 0.99096
## PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.21796 0.19369 0.11394 0.07637 0.06855
## Proportion of Variance 0.00396 0.00313 0.00108 0.00049 0.00039
## Cumulative Proportion 0.99491 0.99804 0.99912 0.99961 1.00000
(loadings_dig <- PCA_dig$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## SL.dig -0.3169909 -0.10611458 0.060604567 -0.02354075 0.18176533
## BD1.dig -0.3049862 0.01844403 0.250670038 -0.08937686 -0.10696578
## BD2.dig -0.3063687 -0.22926027 0.015047023 -0.21325192 0.09067490
## CPD.dig -0.3113160 0.02702292 0.134490705 0.06974432 0.10056312
## CPL.dig -0.3033025 -0.12560059 0.020502291 -0.01630232 0.47718294
## PreDL.dig -0.2422550 -0.64531966 0.171752342 -0.09719333 0.02497410
## DbL.dig -0.2321796 0.50569082 0.572575906 0.32709467 0.04365808
## HL.dig -0.2625901 0.30850000 -0.607101208 -0.21853627 0.16435216
## HD1.dig -0.3120389 0.17140191 0.004180317 -0.07011570 -0.05451678
## HD2.dig -0.2995263 0.29442543 -0.204803116 -0.16644442 0.01982239
## SnL.dig -0.2592646 -0.18806057 -0.382995034 0.84022275 -0.14244593
## OL.dig -0.2964414 -0.02385058 -0.014007596 -0.19509333 -0.81011359
dig.sorted.loadings.1 <- loadings_dig[order(loadings_dig[, 1]), 1]
dig.myTitle <- "Loadings Plot for PC1"
dig.myXlab <- "Variable Loadings"
dotchart(dig.sorted.loadings.1, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")
dig.sorted.loadings.2 <- loadings_dig[order(loadings_dig[, 2]), 2]
dig.myTitle <- "Loadings Plot for PC2"
dig.myXlab <- "Variable Loadings"
dotchart(dig.sorted.loadings.2, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")
dig.sorted.loadings.3 <- loadings_dig[order(loadings_dig[, 3]), 3]
dig.myTitle <- "Loadings Plot for PC3"
dig.myXlab <- "Variable Loadings"
dotchart(dig.sorted.loadings.3, main=dig.myTitle, xlab=dig.myXlab, cex=1.5, col="red")
dig.VM_PCA <- varimax(PCA$rotation[, 1:3])
dig.VM_PCA
## $loadings
##
## Loadings:
## PC1 PC2 PC3
## SL.dig -0.116 -0.220
## BD1.dig -0.235 -0.112
## BD2.dig -0.302
## CPD.dig -0.190 -0.120
## CPL.dig -0.231
## PreDL.dig -0.493 0.376
## DbL.dig -0.469 0.276
## HL.dig 0.168 -0.698
## HD1.dig -0.142 -0.248
## HD2.dig -0.449
## SnL.dig -0.267 -0.231
## OL.dig -0.167 -0.147
## SL.man -0.151 -0.197
## BD1.man -0.303
## CPD.man -0.227 -0.101
## CPL.man -0.118 -0.203
## PreDL.man -0.354
## DbL.man -0.505 0.315
## HL.man -0.190 -0.104
## HD1.man -0.270
## SnL.man -0.115 -0.114
## OL.man -0.197 -0.102
##
## PC1 PC2 PC3
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
##
## $rotmat
## [,1] [,2] [,3]
## [1,] 0.6917396 0.60222957 0.3985170
## [2,] -0.5560565 0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105 0.8857027
(dig.eig.val <- get_eigenvalue(PCA_dig))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 9.697460424 80.81217020 80.81217
## Dim.2 0.956988897 7.97490747 88.78708
## Dim.3 0.558155419 4.65129516 93.43837
## Dim.4 0.321819658 2.68183049 96.12020
## Dim.5 0.169536257 1.41280214 97.53301
## Dim.6 0.121130526 1.00942105 98.54243
## Dim.7 0.066371341 0.55309451 99.09552
## Dim.8 0.047504771 0.39587309 99.49139
## Dim.9 0.037517676 0.31264730 99.80404
## Dim.10 0.012983044 0.10819203 99.91223
## Dim.11 0.005832920 0.04860766 99.96084
## Dim.12 0.004699067 0.03915889 100.00000
dig.ind <- get_pca_ind(PCA_dig)
head(dig.ind$coord[,1:4])
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 -3.877405 -0.1360907 0.21810368 0.6037004
## 2 -4.350838 1.0993666 0.01943977 0.6087639
## 3 -6.089192 0.1481213 0.42598106 0.5018142
## 4 0.215744 0.9684140 -0.13146684 0.9619029
## 5 -2.633685 0.2573236 0.58017122 0.2108573
## 6 -3.078674 0.3356335 0.19729525 0.6931265
dig.p <- cbind(dig_p_long, dig.ind$coord[,1:4])
PCA_man <- prcomp(man_p_long[, 3:12])
summary(PCA_man)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.8377 0.80736 0.67359 0.5225 0.42289 0.34999 0.30348
## Proportion of Variance 0.8052 0.06518 0.04537 0.0273 0.01788 0.01225 0.00921
## Cumulative Proportion 0.8052 0.87042 0.91580 0.9431 0.96098 0.97323 0.98244
## PC8 PC9 PC10
## Standard deviation 0.28419 0.27405 0.14043
## Proportion of Variance 0.00808 0.00751 0.00197
## Cumulative Proportion 0.99052 0.99803 1.00000
(loadings_man <- PCA_man$rotation[, 1:5])
## PC1 PC2 PC3 PC4 PC5
## SL.man -0.3448760 -0.159264090 0.028818596 0.08348653 0.17222913
## BD1.man -0.3336431 0.171772379 -0.041810129 -0.02260661 0.23746504
## CPD.man -0.3381457 0.020375133 -0.022859381 0.02968760 0.21184581
## CPL.man -0.3287681 -0.213919604 -0.004339041 0.15099603 0.37304666
## PreDL.man -0.3128472 -0.481495839 0.092930271 0.10276539 0.13586031
## DbL.man -0.2603841 0.806413535 -0.039923819 0.07691051 0.12592852
## HL.man -0.3152071 0.005916009 0.329640553 0.42089544 -0.72541972
## HD1.man -0.3367866 0.089004745 0.107331634 0.10025382 -0.06396107
## SnL.man -0.2773374 -0.093007562 -0.874263032 -0.13518487 -0.35690086
## OL.man -0.3032190 -0.014288942 0.319579808 -0.86422398 -0.19464322
man.sorted.loadings.1 <- loadings_man[order(loadings_man[, 1]), 1]
man.myTitle <- "Loadings Plot for PC1"
man.myXlab <- "Variable Loadings"
dotchart(man.sorted.loadings.1, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")
man.sorted.loadings.2 <- loadings_man[order(loadings_man[, 2]), 2]
man.myTitle <- "Loadings Plot for PC2"
man.myXlab <- "Variable Loadings"
dotchart(man.sorted.loadings.2, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")
man.sorted.loadings.3 <- loadings_man[order(loadings_man[, 3]), 3]
man.myTitle <- "Loadings Plot for PC3"
man.myXlab <- "Variable Loadings"
dotchart(man.sorted.loadings.3, main=man.myTitle, xlab=man.myXlab, cex=1.5, col="red")
man.VM_PCA <- varimax(PCA$rotation[, 1:3])
man.VM_PCA
## $loadings
##
## Loadings:
## PC1 PC2 PC3
## SL.dig -0.116 -0.220
## BD1.dig -0.235 -0.112
## BD2.dig -0.302
## CPD.dig -0.190 -0.120
## CPL.dig -0.231
## PreDL.dig -0.493 0.376
## DbL.dig -0.469 0.276
## HL.dig 0.168 -0.698
## HD1.dig -0.142 -0.248
## HD2.dig -0.449
## SnL.dig -0.267 -0.231
## OL.dig -0.167 -0.147
## SL.man -0.151 -0.197
## BD1.man -0.303
## CPD.man -0.227 -0.101
## CPL.man -0.118 -0.203
## PreDL.man -0.354
## DbL.man -0.505 0.315
## HL.man -0.190 -0.104
## HD1.man -0.270
## SnL.man -0.115 -0.114
## OL.man -0.197 -0.102
##
## PC1 PC2 PC3
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.045 0.045 0.045
## Cumulative Var 0.045 0.091 0.136
##
## $rotmat
## [,1] [,2] [,3]
## [1,] 0.6917396 0.60222957 0.3985170
## [2,] -0.5560565 0.79629540 -0.2381487
## [3,] -0.4607575 -0.05686105 0.8857027
library(factoextra)
(man.eig.val <- get_eigenvalue(PCA_man))
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 8.05239960 80.5239960 80.52400
## Dim.2 0.65182332 6.5182332 87.04223
## Dim.3 0.45372921 4.5372921 91.57952
## Dim.4 0.27302458 2.7302458 94.30977
## Dim.5 0.17883881 1.7883881 96.09816
## Dim.6 0.12249607 1.2249607 97.32312
## Dim.7 0.09210306 0.9210306 98.24415
## Dim.8 0.08076290 0.8076290 99.05178
## Dim.9 0.07510089 0.7510089 99.80278
## Dim.10 0.01972155 0.1972155 100.00000
man.ind <- get_pca_ind(PCA_man)
head(man.ind$coord[,1:4])
## Dim.1 Dim.2 Dim.3 Dim.4
## 1 -3.7784207 0.6451182 -0.396593632 0.06849222
## 2 -4.1217187 0.6603292 0.008902533 0.87860206
## 3 -6.4466569 0.1376030 -0.378551677 0.95660998
## 4 0.3309617 0.6450504 0.446021000 1.18793061
## 5 -2.4644018 0.5773132 0.605969577 0.29865214
## 6 -2.5445684 0.9381672 0.511011644 0.61730956
man.p <- cbind(man_p_long, man.ind$coord[,1:4])
Now let’s do the t-tests
(pc1 <- t.test(dig.p$Dim.1, man.p$Dim.1, paired= TRUE, alternative = "two.sided"))
##
## Paired t-test
##
## data: dig.p$Dim.1 and man.p$Dim.1
## t = 7.2114e-16, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.08315722 0.08315722
## sample estimates:
## mean difference
## 3.048687e-17
(pc2 <- t.test(dig.p$Dim.2, man.p$Dim.2, paired= TRUE, alternative = "two.sided"))
##
## Paired t-test
##
## data: dig.p$Dim.2 and man.p$Dim.2
## t = -2.1474e-15, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.07233049 0.07233049
## sample estimates:
## mean difference
## -7.896446e-17
(pc3 <- t.test(dig.p$Dim.3, man.p$Dim.3, paired= TRUE, alternative = "two.sided"))
##
## Paired t-test
##
## data: dig.p$Dim.3 and man.p$Dim.3
## t = 4.112e-17, df = 338, p-value = 1
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.1053799 0.1053799
## sample estimates:
## mean difference
## 2.202945e-18
library(AMR)
library(ggplot2)
library(ggfortify)
library(ggbiplot)
wes.yel <- "#E1AF00"
wes.blu <- "#78B7C5"
plot1<- autoplot(PCA, data = z.scores, colour="SPP", loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
#ggsave(plot1, filename = "pc1v2.png", bg="transparent", width = 180, height = 120, units = "mm", dpi = 300)
plot5<- autoplot(PCA, x=2, y=3, data = z.scores, colour='SPP', loadings=FALSE, loadings.label=FALSE, frame=TRUE, frame.type='norm')+scale_color_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ scale_fill_manual(values=c("#E1AF00","#78B7C5", "#856A9C"))+ ggtitle("PCA Plot of Morphology traits") + theme_classic()
plot5
#ggsave(plot5, filename = "pc2v3.png", bg="transparent", width = 180, height = 120, units = "mm", dpi = 300)
If I want to color by method, I think I need to combine my individual PCAs.
#data frame with just BD1 and HD1 for dig
dig_p_long2 <- z.scores %>%
select(ID, SPP, ends_with(".dig"))
dig_p_long2 <- subset(dig_p_long2, select = -c(BD2.dig, HD2.dig))
#data frame with just BD2 and HD2 for dig
dig_p_long3 <- z.scores %>%
select(ID, SPP, ends_with(".dig"))
dig_p_long3 <- subset(dig_p_long3, select = -c(BD1.dig, HD1.dig))
PCA_dig <- prcomp(dig_p_long2[, 3:12])
#PC scores for digital (BD1/HD1)
PCA.scores.dig <- as.data.frame(PCA_dig$x)
PCA.scores.dig$method <- "dig"
PCA.scores.dig$ID <- dig_p_long$ID
PCA.scores.dig$SPP <- dig_p_long$SPP
PCA_dig2 <- prcomp(dig_p_long3[, 3:12])
#PC scores for digital (BD1/HD1)
PCA.scores.dig2 <- as.data.frame(PCA_dig2$x)
PCA.scores.dig2$method <- "dig"
PCA.scores.dig2$ID <- dig_p_long2$ID
PCA.scores.dig2$SPP <- dig_p_long2$SPP
#PC scores for manual
PCA.scores.man <- as.data.frame(PCA_man$x)
PCA.scores.man$method <- "man"
PCA.scores.man$ID <- man_p_long$ID
PCA.scores.man$SPP <- man_p_long$SPP
PCA.comb <- rbind(PCA.scores.dig, PCA.scores.man)
PCA.comb2 <- rbind(PCA.scores.dig2, PCA.scores.man)
ggplot(data = PCA.comb, aes(x = PC1, y = PC2, color = method)) +
geom_point(alpha = 0.8, size = 3) +
stat_ellipse(aes(group = method), level = 0.95) +
scale_color_manual(values=c("#E1AF00","#78B7C5"))+
scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
labs(
title = "PCA of Digital and Manual Measurements (BD/HD1)",
x = "Principal Component 1",
y = "Principal Component 2",
color = "Method"
) +
theme_minimal()
ggplot(data = PCA.comb, aes(x = PC2, y = PC3, color = method)) +
geom_point(alpha = 0.8, size = 3) +
stat_ellipse(aes(group = method), level = 0.95) +
scale_color_manual(values=c("#E1AF00","#78B7C5"))+
scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
labs(
title = "PCA of Digital and Manual Measurements (BD/HD1)",
x = "Principal Component 1",
y = "Principal Component 2",
color = "Method"
) +
theme_minimal()
ggplot(data = PCA.comb2, aes(x = PC1, y = PC2, color = method)) +
geom_point(alpha = 0.8, size = 3) +
stat_ellipse(aes(group = method), level = 0.95) +
scale_color_manual(values=c("#E1AF00","#78B7C5"))+
scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
labs(
title = "PCA of Digital and Manual Measurements (BD/HD2)",
x = "Principal Component 1",
y = "Principal Component 2",
color = "Method"
) +
theme_minimal()
ggplot(data = PCA.comb2, aes(x = PC2, y = PC3, color = method)) +
geom_point(alpha = 0.8, size = 3) +
stat_ellipse(aes(group = method), level = 0.95) +
scale_color_manual(values=c("#E1AF00","#78B7C5"))+
scale_fill_manual(values=c("#E1AF00","#78B7C5")) +
labs(
title = "PCA of Digital and Manual Measurements (BD/HD2)",
x = "Principal Component 1",
y = "Principal Component 2",
color = "Method"
) +
theme_minimal()